In [1]:
import pandas as pd, numpy as np
import kendo_romania

Read data


In [2]:
matches={2018:{},2017:{},2016:{},2015:{},2014:{}}

In [3]:
filename='rawdata/2018/CR/CR25 - Public.xlsx'
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
matches[2018]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3)

In [4]:
filename='rawdata/2018/SL/Prezenta SL_WKC17.xlsx'
sheetname=['F','M']
matches[2018]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,5)

In [5]:
categories=['Individual masculin','Echipe']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=0
matches[2017]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [6]:
categories=['Individual juniori mici','Individual juniori mari','Individual feminin']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CN']=matches[2017]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [7]:
categories=['Individual masculin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2017]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [8]:
categories=['Individual juniori','Individual veterani','Individual feminin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [9]:
categories=['Echipe']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=0
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [10]:
filename='rawdata/2017/SL/Prezenta.xlsx'
sheetname=['F','M','J']
matches[2017]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [11]:
filename='rawdata/2016/SL/Event management - stagiul 4.xlsx'
sheetname=['F','M']
matches[2016]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [12]:
sheetname=['J']
matches[2016]['SL']=matches[2016]['SL']+\
        kendo_romania.get_matches_from_table(filename,sheetname,5)

In [13]:
categories=['Individual masculin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2016]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [14]:
categories=['Individual feminin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [15]:
categories=['Echipe','Male team']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=0
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [16]:
categories=['Junior 1 individual','Junior 2 individual']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [17]:
filename='rawdata/2016/CR/Event management_CR23.2016.xlsx'
sheetname=['IF_m','IJ_m','IM_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2016]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [18]:
sheetname=['EJ_m','ES_m']
matches[2016]['CR']=matches[2016]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [19]:
filename='rawdata/2014/SL/Lista de participanti 6.xlsx'
sheetname=['SF_s','SM_s','J_s']
matches[2014]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [20]:
filename='rawdata/2015/SL/Event management - stagiul 5.xlsx'
sheetname=['SF_s','SM_s']
matches[2015]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [21]:
filename='rawdata/2015/CN/Event management_CN22.2015.xlsx'
sheetname=['IF_m','IJ2_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [22]:
sheetname='E_m'
matches[2015]['CN']=matches[2015]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [23]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IF_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [32]:
matches[2015]['CR'][0]


Out[32]:
{'match_type': 'rawdata/2015/CR/Event management_CR22.2015.xlsx#1',
 'aka': {'name': 'Burinaru  Alexandra', 'point1': 'M'},
 'shiro': {'name': 'Gușu Rebeca', 'point1': nan},
 'outcome': 1,
 'shinpan': {'fukushin1': 'Luca M.',
  'shushin': 'Mahika S.',
  'fukushin2': 'Győrfi S.'}}

Clean up points, matches, player names


In [701]:
def match_cleaner(match):
    kind,phase='Unknown','Unknown'
    if '#' in match:
        stage0=match.split('#')[0].lower()
        stage1=match.split('#')[1]
        if 'pool' in stage1: 
            phase='Pool'
        if 'Pool' in stage1: 
            phase='Pool'
        elif 'prel' in stage1: 
            phase='Prelim.'
        elif 'Prel' in stage1: 
            phase='Prelim.'
        elif 'layoff' in stage1: 
            phase='Prelim.'
        elif '- F' in stage1: 
            phase='Finals'
        elif 'F -' in stage1: 
            phase='Finals'
        elif 'Final' in stage1: 
            phase='Finals'
        elif 'SF' in stage1: 
            phase='Finals'
        elif 'QF' in stage1: 
            phase='Finals'
        elif 'A'==stage1: phase='Pool'
        elif 'B'==stage1: phase='Pool'
        elif 'C'==stage1: phase='Pool'
        elif 'D'==stage1: phase='Pool'
        elif 'E'==stage1: phase='Pool'
        elif 'F'==stage1: phase='Pool'
        elif 'G'==stage1: phase='Pool'
        elif 'H'==stage1: phase='Pool'
        elif 'I'==stage1: phase='Pool'
        elif 'J'==stage1: phase='Pool'
        elif 'K'==stage1: phase='Pool'
        elif 'L'==stage1: phase='Pool'
        elif 'M'==stage1: phase='Pool'
        elif 'N'==stage1: phase='Pool'
        elif 'O'==stage1: phase='Pool'
        elif 'P'==stage1: phase='Pool'
        elif 'Q'==stage1: phase='Pool'
        elif 'R'==stage1: phase='Pool'
        elif 'S'==stage1: phase='Pool'
        elif 'T'==stage1: phase='Pool'
        
        if 'IS' in stage1:
            kind="Senior's Individual"
        elif 'IF' in stage1:
            kind="Women's Individual"
        elif 'IM' in stage1:
            kind="Men's Individual"
        elif 'IC' in stage1:
            kind="Children's Individual"
        elif 'IJ' in stage1:
            kind="Junior's Individual"
        elif 'EJ' in stage1:
            kind="Junior's Team"
        elif 'EF' in stage1:
            kind="Men's Team"
        elif 'ES' in stage1:
            kind="Senior's Team"
            
        if 'individual masculin.' in stage0:
            kind="Men's Individual"
        if 'echipe.' in stage0:
            kind="Mixed Team"
        if 'individual juniori' in stage0:
            kind="Junior's Team"
        if 'individual feminin' in stage0:
            kind="Junior's Team"
        if 'individual veterani' in stage0:
            kind="Senior's Team"
        if 'male team' in stage0:
            kind="Men's Team"
        if 'junior 1 individual' in stage0:
            kind="Junior's Individual"
        if 'junior 2 individual' in stage0:
            kind="Junior's Individual"
        
    elif match=='F':
        kind="Women's Individual"
    elif match=='M':
        kind="Men's Individual"
    elif match=='J':
        kind="Junior's Individual"
    elif match=='SF_s':
        kind="Women's Individual"
    elif match=='SM_s':
        kind="Men's Individual"
    elif match=='J_s':
        kind="Junior's Individual"
    
    return kind,phase

In [702]:
name_exceptions={'Atanasovski':'Atanasovski X. (XXX)',
                 'Dobrovicescu (SON)':'Dobrovicescu M. (SON)'}
def name_cleaner(name):
    if name not in name_exceptions:
        nc=name.replace('  ',' ').split('(')
    else:
        nc=name_exceptions[name].split('(')
    if len(nc)<2:
        nc=[nc[0],'XXX']
    rname=nc[0].strip()
    rclub=nc[1].replace('(','').replace(')','').strip()
    rnames=rname.split(' ')
    sname=rnames[0]+' '+rnames[1][0]+'.'
    return sname, rclub

In [703]:
name_exceptions={'Atanasovski':'Atanasovski A. (MAC)',
                 'Dobrovicescu (SON)':'Dobrovicescu M. (SON)',
                 'Ianăș':'Ianăș F.',
                 'Arabadjiyski': 'Arabadjiyski A.',
                 'Mandia':'Mandia F.',
                 'Stanev':'Stanev A.',
                 'Mochalov':'Mochalov O.',
                 'Sozzi':'Sozzi A.',
                 'Crăciunel':'Crăciunel I.',
                 'Craciunel':'Crăciunel I.',
                 'Sagaev':'Sagaev L.',
                 'Buzás':'Buzás C.',
                 'Csala':'Csala D.',
                 'Dimitrov':'Dimitrov M.',
                 'Józsa':'Józsa L.',
                 'Creangă':'Creangă A.',
                 'Duțescu':'Duțescu M.',                 
                 'Furtună':'Furtună G.',
                 'Gârbea':'Gârbea I.',
                 'Stupu':'Stupu I.',
                 'Mahika-Voiconi':'Mahika-Voiconi S.',
                 'Mahika':'Mahika-Voiconi S.',
                 'Stanciu':'Stanciu F.',
                 'Vrânceanu':'Vrânceanu R.',
                 'Luca':'Luca M.',
                 'Wolfs':'Wolfs J.',
                 'Ducarme':'Ducarme A.',
                 'Sbârcea':'Sbârcea B.',
                 'Mocian':'Mocian A.',
                 'Hatvani':'Hatvani L.',
                 'Dusan':'Dusan N.',
                 'Borota':'Borota V.',
                 'Tsushima':'Tsushima K.',
                 'Tráser':'Tráser T.',
                 'Colțea':'Colțea A.',
                 'Brîcov':'Brîcov A.'}
redflags_names=['-','—','—',np.nan,'. ()','— ','- -.','- -. (-)',
                'Kashi','Sankon','București','Victorii:','Sakura','Taiken','Ikada','Sonkei','CRK','Museido',
                'Ichimon','Bushi Tokukai 1','Competitori – Shiai-sha','Echipa - roşu','Numele şi prenumele',
                'Victorii:','Victorii: 0','Victorii: 1','Victorii: 2','Victorii: 3','Victorii: 4',
                'Victorii: 5','?']
name_equals={'Chirea A.':'Chirea M.',
            'Ghinet C.':'Ghineț C.',
            'Domnița M.':'Domniță M.',
            'Garbea I.':'Gârbea I.',
            'Horvát M.':'Horváth M.',
            'Ionita A.':'Ioniță A.',
            'Medvedschi I.':'Medvețchi I.',
            'Mahika S.':'Mahika-Voiconi S.',
            'Mate L.':'Máté L.',
            'Stupu A.':'Stupu I.'}
letter_norm={'ţ':'ț','ş':'ș'}
def name_cleaner(name):
    for letter in letter_norm:
        name=name.replace(letter,letter_norm[letter])
    if name not in name_exceptions:
        nc=name.replace('  ',' ').split('(')    
    else:
        nc=name_exceptions[name].split('(')
    rname=nc[0].strip()
    rnames=rname.split(' ')
    sname=rnames[0]+' '+rnames[1][0]+'.'
    if sname in name_equals:
        sname=name_equals[sname]
    return sname

Standardize names


In [704]:
all_players={}
all_players_r={}
all_players_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            for color in ['aka','shiro']:
                name=match[color]['name']
                all_players_unsorted.add(name)
                if name not in redflags_names:
                    name=name_cleaner(name)
                    rname=match[color]['name']
                    if rname not in all_players_r:all_players_r[rname]=name
                    if name not in all_players: all_players[name]={}
                    if year not in all_players[name]:all_players[name][year]={'names':set()}
                    all_players[name][year]['names'].add(rname)

In [705]:
all_shinpan={}
all_shinpan_r={}
all_shinpan_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            if 'shinpan' in match:
                for color in ['fukushin1','shushin','fukushin2']:
                    aka=match['aka']['name']
                    shiro=match['shiro']['name']
                    if (aka not in redflags_names) and\
                       (shiro not in redflags_names) and\
                       (name_cleaner(aka) in all_players) and\
                       (name_cleaner(shiro) in all_players):
                        rname=match['shinpan'][color]
                        all_shinpan_unsorted.add(rname)
                        if (rname not in redflags_names):
                            name=name_cleaner(rname)
                            if name not in all_shinpan: all_shinpan[name]=[] 
                            all_shinpan[name].append(match)
                            if rname not in all_shinpan_r:all_shinpan_r[rname]=name

Infer clubs


In [706]:
#naive infer
redflags_clubs=['','N/A','RO1','RO2']
club_equals={'MLD':'MOL',
             'IKD':'IKA',
             'BUL':'BUL/Bg',
             'TUR':'TUR/Tr',
             'MAC':'MAC/Mc',
             'MNE':'MNE/Mn',
             'SRB':'SRB/Sr',
             'ITA':'ITA/It',
             'Musso, Bg':'MUS/Bg',
             'Makoto, Sr':'MAK/Sr',
             'Szeged, Hu':'SZE/Hu'}
for name in all_players:
    for year in all_players[name]:
        for name_form in all_players[name][year]['names']:
            if '(' in name_form:
                club=name_form.split('(')[1].strip()[:-1]
                if club in club_equals: club=club_equals[club]
                if club not in redflags_clubs:
                    all_players[name][year]['club']=club

In [707]:
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #more than 1 year?
            years=np.sort(list(all_players[name].keys()))
            if len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

In [708]:
clubs_manual={'Balázs-Kercsó Z.':'BTK',
             'Vrânceanu M.':'SAN',
             'Duțescu M.':'IKA',
             'Crăciun D.':'SAM',
             'Nagy V.':'ISS',
             'Goró L.':'BTK',
             'Ghineț G.':'YUK',
             'Cioată E.':'KAS',
             'Perianu S.':'KNS'}
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #if still not found, print error, infer other way
            if name in clubs_manual:
                all_players[name][year]['club']=clubs_manual[name]
            else:
                print('error',name,year,all_players[name])
                all_players[name][year]['club']='XXX'


error Magiaru A. 2016 {2016: {'names': {'Magiaru Alexandru'}}}
error Costea C. 2015 {2015: {'names': {'Costea Cătălin'}}}
error Constantin D. 2015 {2015: {'names': {'Constantin Doru'}}}
error Molnár E. 2015 {2015: {'names': {'Molnár Emese'}}}
error Canceu A. 2015 {2015: {'names': {'Canceu Adriana-Maria'}}}
error Trandafir A. 2015 {2015: {'names': {'Trandafir Ana'}}}
error Matea C. 2015 {2015: {'names': {'Matea Coralia-Ioana'}}}
error Tamang S. 2015 {2015: {'names': {'Tamang Sujata'}}}
error Cismaru D. 2015 {2015: {'names': {'Cismaru Daniela'}}}
error Tütsek K. 2015 {2015: {'names': {'Tütsek Kinga'}}}
error Iurea G. 2015 {2015: {'names': {'Iurea Gheorghe'}}}

In [709]:
clubs={}
for name in all_players:
    for year in all_players[name]:
        club=all_players[name][year]['club'] 
        if club not in clubs:clubs[club]={}
        if year not in clubs[club]:clubs[club][year]=set()
        clubs[club][year].add(name)

In [710]:
def outcome_cleaner(outcome):
    if outcome=='E': return True
    else: return False

In [711]:
def outcome_from_points(aka,shiro):
    if aka==shiro: return 'X',0
    elif aka>shiro: return 'A',str(aka-shiro)
    else: return 'S',str(shiro-aka)

In [712]:
redflags_points=['Puncte']
def points_cleaner(points):
    hansoku=0
    if '∆' in points:
        hansoku=1
        points=points.replace('∆','')
    if len(points)>2:
        print(points,'error')
    elif len(points)>1:
        point1=points[0]
        point2=points[1]
    elif len(points)>0:
        point1=points[0]
        point2=''
    else:
        point1=''
        point2=''
    return point1,point2,len(points),hansoku

In [713]:
def club_cleaner(club):
    if '/' in club:
        return club.split('/')[0],club.split('/')[1].upper()
    else:
        return club,'RO'

In [714]:
pretty_clubs={'ARA':'Arashi', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
              'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita', 
              'ICH':'Ichimon', 'IKA':'Ikada','ISS':'Ishhin', 'IT':'Italy','HU':'Hungary',
              'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
              'SR':'Serbia', 'MN':'Montenegro', 'MOL':'Moldova', 'MUS':'Museido', 
               'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
               'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey', 'XXX':'Unknown',
               'YUK':'Yu-kai'}
def pretty_club(club, country):
    if country!='RO':
        return pretty_clubs[country]
    else: return pretty_clubs[club]

In [715]:
master_matches=[]
for year in matches:
    for competition in matches[year]:
        print(year,competition)
        for k in matches[year][competition]:
            good=True
            match={'year':year,'competition':competition}
            match['match_kind'],match['match_phase']=match_cleaner(k['match_type'])
            if 'shinpan' in k:
                for color in ['fukushin1','shushin','fukushin2']:
                    if color in k['shinpan']:
                        if k['shinpan'][color] in all_shinpan_r:
                            match[color]=all_shinpan_r[k['shinpan'][color]]
            for a in ['aka','shiro']:
                points=''
                for h in k[a]:
                    if h=='name':
                        name=k[a][h]
                        if name in all_players_r:
                            match[a+' name']=all_players_r[name]
                            club, country=club_cleaner(all_players[match[a+' name']][year]['club'])
                            match[a+' club'], match[a+' country']=club, country
                            match[a+' pretty_club']=pretty_club(club, country)
                        else:
                            good=False
                    else:
                        point=k[a][h]
                        if str(point)=='nan': point=''
                        points=points+point
                for redflag in redflags_points:
                    if redflag in points:
                        good=False
                if good:
                    match[a+' point1'],match[a+' point2'],match[a+' points'],match[a+' hansoku']=points_cleaner(points)
            if good:
                if 'outcome' in k:
                    match['encho']=outcome_cleaner(k['outcome'])
                else: 
                    match['encho']=False
                match['winner'],match['difference']=outcome_from_points(match['aka points'],match['shiro points'])

                master_matches.append(match)


2018 CR
2018 SL
2017 CN
2017 CR
2017 SL
2016 SL
2016 CN
2016 CR
2015 SL
2015 CN
2015 CR
2014 SL

In [716]:
data=pd.DataFrame(master_matches)

Cleanup


In [717]:
data.to_csv('data/matches.csv')

Group by player


In [718]:
aka=data[[i for i in data.columns if 'shiro ' not in i]]
aka.columns=[i.replace('aka ','') for i in aka.columns]
aka['color']='aka'


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until

In [719]:
shiro=data[[i for i in data.columns if 'aka ' not in i]]
shiro.columns=[i.replace('shiro ','') for i in shiro.columns]
shiro['color']='shiro'


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until

In [720]:
extended_matches=pd.concat([aka,shiro],axis=0).reset_index(drop=True)


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  """Entry point for launching an IPython kernel.

In [721]:
extended_matches.to_csv('data/extended_matches.csv')

In [722]:
p1=extended_matches[[i for i in extended_matches.columns if i!='point2']]
p2=extended_matches[[i for i in extended_matches.columns if i!='point1']]
p1.rename(columns={'point1':'point'}, inplace=True)
p2.rename(columns={'point2':'point'}, inplace=True)


C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py:3781: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)

In [723]:
extended_points=pd.concat([p1,p2],axis=0).reset_index(drop=True)

In [724]:
extended_points.to_csv('data/extended_points.csv')

In [725]:
extended_points


Out[725]:
club color competition country difference encho fukushin1 fukushin2 hansoku match_kind match_phase name point points pretty_club shushin winner year
0 SBK aka CR RO 2 False NaN NaN 0 Senior's Individual Pool Ciorabai E. 0 Sobukan NaN S 2018
1 JAS aka CR HU 2 False Ianăș F. Mandia F. 0 Senior's Individual Pool Szőke S. 0 Hungary Arabadjiyski A. S 2018
2 SBK aka CR RO 2 False NaN NaN 0 Senior's Individual Pool Ciorabai E. 0 Sobukan NaN S 2018
3 ARA aka CR RO 1 False Ianăș F. Mandia F. 0 Senior's Individual Pool Furtună G. 0 Arashi Arabadjiyski A. S 2018
4 SBK aka CR RO 2 False NaN NaN 0 Senior's Individual Pool Ciorabai E. 0 Sobukan NaN S 2018
5 ARA aka CR RO 1 False Ianăș F. Mandia F. 0 Senior's Individual Pool Furtună G. 0 Arashi Arabadjiyski A. S 2018
6 MOL aka CR RO 0 False Györfi G. Ianăș F. 0 Senior's Individual Pool Oncea V. 0 Moldova Mandia F. X 2018
7 MOL aka CR RO 2 False Györfi G. Ianăș F. 0 Senior's Individual Pool Oncea V. K 2 Moldova Mandia F. A 2018
8 SBK aka CR RO 2 False Györfi G. Ianăș F. 0 Senior's Individual Pool Moise A. 0 Sobukan Mandia F. S 2018
9 KAS aka CR RO 2 False Stanev A. Györfi G. 0 Senior's Individual Pool Raicea V. K 2 Kashi Ianăș F. A 2018
10 MOL aka CR RO 2 False Stanev A. Györfi G. 0 Senior's Individual Pool Ulinici A. M 2 Moldova Ianăș F. A 2018
11 KAS aka CR RO 2 False Stanev A. Györfi G. 0 Senior's Individual Pool Raicea V. M 2 Kashi Ianăș F. A 2018
12 YUK aka CR RO 1 False Mochalov O. Stanev A. 0 Senior's Individual Pool Cireașă D. M 1 Yu-kai Györfi G. S 2018
13 KAS aka CR RO 2 False Mochalov O. Stanev A. 0 Senior's Individual Pool Raicea V. M 2 Kashi Györfi G. A 2018
14 YUK aka CR RO 0 False Mochalov O. Stanev A. 0 Senior's Individual Pool Cireașă D. 0 Yu-kai Györfi G. X 2018
15 MOL aka CR RO 1 False Arabadjiyski A. Mochalov O. 0 Senior's Individual Prelim. Ulinici A. 0 Moldova Stanev A. S 2018
16 BTK aka CR RO 1 False Arabadjiyski A. Mochalov O. 0 Senior's Individual Prelim. Antal R. 0 Bushi Tokukai Stanev A. S 2018
17 TAI aka CR RO 2 False Arabadjiyski A. Mochalov O. 0 Senior's Individual Finals Cristea C. 0 Taiken Stanev A. S 2018
18 JAS aka CR HU 2 False Mandia F. Arabadjiyski A. 0 Senior's Individual Finals Szőke S. 0 Hungary Mochalov O. S 2018
19 MOL aka CR RO 1 False Mandia F. Arabadjiyski A. 0 Senior's Individual Finals Oncea V. 0 Moldova Mochalov O. S 2018
20 KYO aka CR RO 1 False Sozzi A. Sagaev L. 0 Women's Individual Finals Burinaru A. M 1 Kyobukan Crăciunel I. A 2018
21 SAM aka CR RO 2 False Sozzi A. Sagaev L. 1 Women's Individual Finals Gherman O. M 2 Sam-sho Crăciunel I. A 2018
22 KYO aka CR RO 2 False Sozzi A. Sagaev L. 0 Women's Individual Finals Burinaru A. M 2 Kyobukan Crăciunel I. A 2018
23 MUS aka CR BG 0 False Buzás C. Sozzi A. 1 Women's Individual Finals Madzharova B. 0 Bulgaria Sagaev L. X 2018
24 KYO aka CR RO 2 False Csala D. Sozzi A. 0 Women's Individual Finals Burinaru A. M 2 Kyobukan Sagaev L. A 2018
25 MUS aka CR BG 0 False Csala D. Sozzi A. 0 Women's Individual Finals Madzharova B. 0 Bulgaria Sagaev L. X 2018
26 YUK aka CR RO 0 False Dimitrov M. Buzás C. 0 Women's Individual Finals Ghineț C. 0 Yu-kai Sozzi A. X 2018
27 MUS aka CR BG 2 False Dimitrov M. Buzás C. 0 Women's Individual Finals Kasnakova D. M 2 Bulgaria Sozzi A. A 2018
28 YUK aka CR RO 1 False Dimitrov M. Buzás C. 0 Women's Individual Finals Ghineț C. 0 Yu-kai Sozzi A. S 2018
29 SON aka CR RO 2 False Crăciunel I. Dimitrov M. 0 Women's Individual Finals Mihăilă L. K 2 Sonkei Buzás C. A 2018
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6486 KAS shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Józsa L. 1 Kashi NaN X 2014
6487 KAS shiro SL RO 2 False NaN NaN 1 Men's Individual Unknown Raicea V. 0 Kashi NaN A 2014
6488 SAM shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Crăciun D. 0 Sam-sho NaN A 2014
6489 SAN shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Vrânceanu R. 0 Sankon NaN A 2014
6490 TAI shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Alexa A. 0 Taiken NaN X 2014
6491 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Mocian A. 1 Taiken NaN A 2014
6492 KAS shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Józsa L. 1 Kashi NaN X 2014
6493 KAS shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Raicea V. 1 Kashi NaN A 2014
6494 SAM shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Crăciun D. 0 Sam-sho NaN A 2014
6495 SAN shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Vrânceanu R. 0 Sankon NaN X 2014
6496 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Alexa A. 0 Taiken NaN A 2014
6497 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Mocian A. 1 Taiken NaN A 2014
6498 KAS shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Raicea V. 0 Kashi NaN X 2014
6499 SAM shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Crăciun D. 0 Sam-sho NaN A 2014
6500 SAN shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Vrânceanu R. 0 Sankon NaN A 2014
6501 TAI shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Alexa A. 0 Taiken NaN A 2014
6502 TAI shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Mocian A. K 2 Taiken NaN S 2014
6503 SAM shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Crăciun D. 1 Sam-sho NaN S 2014
6504 SAN shiro SL RO 0 False NaN NaN 0 Men's Individual Unknown Vrânceanu R. 0 Sankon NaN X 2014
6505 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Alexa A. 1 Taiken NaN S 2014
6506 TAI shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Mocian A. K 2 Taiken NaN S 2014
6507 SAN shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Vrânceanu R. 1 Sankon NaN A 2014
6508 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Alexa A. 0 Taiken NaN A 2014
6509 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Mocian A. M 2 Taiken NaN S 2014
6510 TAI shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Alexa A. M 2 Taiken NaN S 2014
6511 TAI shiro SL RO 1 False NaN NaN 0 Men's Individual Unknown Mocian A. 1 Taiken NaN A 2014
6512 TAI shiro SL RO 2 False NaN NaN 0 Men's Individual Unknown Mocian A. K 2 Taiken NaN S 2014
6513 ISS shiro SL RO 2 False NaN NaN 0 Junior's Individual Unknown Nagy V. 0 Ishhin NaN A 2014
6514 BTK shiro SL RO 2 False NaN NaN 0 Junior's Individual Unknown Horváth M. 0 Bushi Tokukai NaN A 2014
6515 BTK shiro SL RO 2 False NaN NaN 0 Junior's Individual Unknown Horváth M. 0 Bushi Tokukai NaN A 2014

6516 rows × 18 columns

Competitor statistics


In [726]:
competitors={}
for i in data.T.iteritems():
    for a in ['aka ','shiro ']:
        name=i[1][a+'name']
        club=i[1][a+'club']
        if name not in competitors:
            competitors[name]={'U':0,'club':club}
        for j in ['point1','point2']:
            point=i[1][a+j] 
            if point!='':
                if point not in competitors[name]:competitors[name][point]=0
                competitors[name][point]+=1
        competitors[name]['U']+=1

In [727]:
data2=pd.DataFrame(competitors)

In [728]:
data2.T.to_csv('data/competitors.csv')